# package.list <- c( "xmltools","purrr","xml2","dplyr","pander","XML" )
# install.packages( package.list )
library( xmltools ) # xml tools
library( xml2 ) # xml tools
library( XML ) # xml tools
library( dplyr ) # data wrangling
library( purrr ) # data wrangling
library( pander ) # pretty tables
library( DiagrammeR ) # network diagrams Demo XML data from Part V of Schedule H - the table of facilities.
sierraclub<-
"<SIERRACLUB>
<Section527PoliticalOrgGrp>
<OrganizationBusinessName>
<BusinessNameLine1Txt>THE SIERRA CLUB VOTER EDUCATION FUND</BusinessNameLine1Txt>
</OrganizationBusinessName>
<USAddress>
<AddressLine1Txt>2101 WEBSTER STREET SUITE 1300</AddressLine1Txt>
<CityNm>OAKLAND</CityNm>
<StateAbbreviationCd>CA</StateAbbreviationCd>
<ZIPCd>94612</ZIPCd>
</USAddress>
<EIN>943244759</EIN>
<ContributionsRcvdDlvrAmt>241025</ContributionsRcvdDlvrAmt>
</Section527PoliticalOrgGrp>
</SIERRACLUB>"doc <- xml2::read_xml( sierraclub )
nodeset <- xml2::xml_children( doc ) # top level nodeset
xmltools::xml_view_trees( doc )## ++-- Section527PoliticalOrgGrp
## +-- EIN
## +-- ContributionsRcvdDlvrAmt
## +-- OrganizationBusinessName
## ++-- BusinessNameLine1Txt
## ++-- USAddress
## +-- AddressLine1Txt
## +-- CityNm
## +-- StateAbbreviationCd
## ++-- ZIPCd
doc <- xml2::read_xml(sierraclub)
# nodeset is a group of nodes returned
# from the previous step
nodeset_to_df <- function( nodeset )
{
node_to_df <- function(x)
{
vec <- xml_text(xml_children(x))
names(vec) <- xml_name(xml_children(x))
df <- data.frame( as.list( vec ), stringsAsFactors=F )
return( df )
}
node.list <- lapply( nodeset, node_to_df )
df <- bind_rows( node.list )
return( df )
}
# xml2::xml_structure( doc )
# nodeset <- xml2::xml_children( doc )
# nodeset_to_df( nodeset )
group.name.1 <- "//Section527PoliticalOrgGrp"
group.name.2 <- "//Sec527PolOrgs"
group.name.3 <- "//Form990ScheduleCPartI/Sec527PolOrgs"
all.versions <- paste( group.name.1, group.name.2,group.name.3, sep="|" )
# sierra club
V1 <- "https://s3.amazonaws.com/irs-form-990/201843129349301954_public.xml"
doc <- read_xml( V1 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions )
nodeset_to_df( node.group ) %>% pander()| OrganizationBusinessName | USAddress | EIN |
|---|---|---|
| THE SIERRA CLUB VOTER EDUCATION FUND | 2101 WEBSTER STREET SUITE 1300OAKLANDCA94612 | 943244759 |
| ContributionsRcvdDlvrAmt |
|---|
| 241025 |
# load concordance for schedule h
#dropbox.url <- "https://www.dropbox.com/s/k2tohl0ik4odjii/schedule-h-v1.csv?dl=1"
dropbox <- "https://www.dropbox.com/s/z00h5n2eimjhuyl/schedule-c.csv?dl=1"
sched.c <- read.csv( file=dropbox )
head( sched.c ) %>% pander()| xpath |
|---|
| /Return/ReturnData/IRS990ScheduleC/Form990ScheduleCPartI/PoliticalExpenditures |
| /Return/ReturnData/IRS990ScheduleC/PoliticalExpenditures |
| /Return/ReturnData/IRS990ScheduleC/PoliticalExpendituresAmt |
| /Return/ReturnData/IRS990ScheduleC/Form990ScheduleCPartI/VolunteerHours |
| /Return/ReturnData/IRS990ScheduleC/VolunteerHours |
| /Return/ReturnData/IRS990ScheduleC/VolunteerHoursCnt |
| variable_name | description | location_code_xsd |
|---|---|---|
| SC_01_POLI_EXP | Political expenditures | [PoliticalExpenditures] Part I-A Line 2 |
| SC_01_POLI_EXP | Political expenditures | [PoliticalExpenditures] Part I-A Line 2 |
| SC_01_POLI_EXP | Political expenditures | [PoliticalExpendituresAmt] Part I-A Line 2 |
| SC_01_POLI_VOL_HOURS | Volunteer hours | [VolunteerHoursCnt] Part I-A Line 3 |
| SC_01_POLI_VOL_HOURS | Volunteer hours | [VolunteerHours] Part I-A Line 3 |
| SC_01_POLI_VOL_HOURS | Volunteer hours | [VolunteerHoursCnt] Part I-A Line 3 |
| location_code | form | form_type | form_part | form_line_number |
|---|---|---|---|---|
| SCHED-C-PART-01-A-LINE-02 | SCHED-C | SZ | PART-01-A | Line 02 |
| SCHED-C-PART-01-A-LINE-02 | SCHED-C | SZ | PART-01-A | Line 02 |
| SCHED-C-PART-01-A-LINE-02 | SCHED-C | SZ | PART-01-A | Line 02 |
| SCHED-C-PART-01-A-LINE-03 | SCHED-C | SZ | PART-01-A | Line 03 |
| SCHED-C-PART-01-A-LINE-03 | SCHED-C | SZ | PART-01-A | Line 03 |
| SCHED-C-PART-01-A-LINE-03 | SCHED-C | SZ | PART-01-A | Line 03 |
| variable_scope | data_type_xsd | data_type_simple | rdb_relationship |
|---|---|---|---|
| PZ | numeric | ONE | |
| PZ | USAmountType | numeric | ONE |
| PZ | USAmountType | numeric | ONE |
| PZ | numeric | ONE | |
| PZ | IntegerNNType | numeric | ONE |
| PZ | IntegerNNType | numeric | ONE |
| rdb_table | required |
|---|---|
| SC-P01-T00-LOBBY | NA |
| SC-P01-T00-LOBBY | NA |
| SC-P01-T00-LOBBY | NA |
| SC-P01-T00-LOBBY | NA |
| SC-P01-T00-LOBBY | NA |
| SC-P01-T00-LOBBY | NA |
| versions |
2009v1.0;2009v1.1;2009v1.2;2009v1.3;2009v1.4;2009v1.7;2010v3.2;2010v3.4;2010v3.6;2010v3.7;2011v1.2;2011v1.3;2011v1.4;2011v1.5;2012v2.0;2012v2.1;2012v2.2;2012v2.3;2012v3.0
2013v3.0;2013v3.1;2013v4.0;2014v5.0;2014v6.0;2015v2.0;2015v2.1;2015v3.0;2016v3.0
2009v1.0;2009v1.1;2009v1.2;2009v1.3;2009v1.4;2009v1.7;2010v3.2;2010v3.4;2010v3.6;2010v3.7;2011v1.2;2011v1.3;2011v1.4;2011v1.5;2012v2.0;2012v2.1;2012v2.2;2012v2.3;2012v3.0
| latest_version | duplicated | current_version | production_rule | validated |
|---|---|---|---|---|
| NA | NA | NA | NA | NA |
| 2012 | FALSE | TRUE | NA | NA |
| 2016 | FALSE | TRUE | NA | NA |
| NA | NA | NA | NA | NA |
| 2012 | FALSE | TRUE | NA | NA |
| 2016 | FALSE | TRUE | NA | NA |
# remove leading forward slash
# though we need to add it back below
xpaths <- sched.c$xpath
xpaths <- gsub( "/Return/", "Return/", xpaths )SC-P01-T00-LOBBY
SC-P01-T01-POLITICAL-ORGS-INFO
table( sched.c$rdb_table )##
## SC-P01-T00-LOBBY SC-P01-T01-POLITICAL-ORGS-INFO
## 30 61
## SC-P02-T00-LOBBY SC-P03-T00-LOBBY
## 194 30
## SC-P04-T99-SUPPLEMENTAL-INFO
## 6
Get xpaths for the one-to-many tables:
xpaths.t1 <- xpaths[ sched.c$rdb_table == "SC-P01-T01-POLITICAL-ORGS-INFO" ]
xpaths.t2 <- xpaths[ sched.c$rdb_table == "SC-P04-T99-SUPPLEMENTAL-INFO" ]nodes <- strsplit( xpaths.t1, "/" )
xpath.levels <- sapply( nodes, length )
max.lev <- max( xpath.levels )
nodes <- lapply( nodes, function(x){ c(x, rep("",max.lev-length(x) ) ) } )
# compare xpath levels
xpath.levels <- data.frame( do.call( cbind, nodes ), stringsAsFactors=F )
xpath.levels[ , 1:10 ]%>% pander| X1 | X2 | X3 |
|---|---|---|
| Return | Return | Return |
| ReturnData | ReturnData | ReturnData |
| IRS990ScheduleC | IRS990ScheduleC | IRS990ScheduleC |
| Form990ScheduleCPartI | Sec527PolOrgs | Section527PoliticalOrgGrp |
| Sec527PolOrgs | NameOf527Organization | OrganizationBusinessName |
| NameOf527Organization | BusinessNameLine1 | BusinessNameLine1 |
| BusinessNameLine1 |
| X4 | X5 | X6 |
|---|---|---|
| Return | Return | Return |
| ReturnData | ReturnData | ReturnData |
| IRS990ScheduleC | IRS990ScheduleC | IRS990ScheduleC |
| Section527PoliticalOrgGrp | Form990ScheduleCPartI | Sec527PolOrgs |
| OrganizationBusinessName | Sec527PolOrgs | NameOf527Organization |
| BusinessNameLine1Txt | NameOf527Organization | BusinessNameLine2 |
| BusinessNameLine2 |
| X7 | X8 | X9 |
|---|---|---|
| Return | Return | Return |
| ReturnData | ReturnData | ReturnData |
| IRS990ScheduleC | IRS990ScheduleC | IRS990ScheduleC |
| Section527PoliticalOrgGrp | Section527PoliticalOrgGrp | Form990ScheduleCPartI |
| OrganizationBusinessName | OrganizationBusinessName | Sec527PolOrgs |
| BusinessNameLine2 | BusinessNameLine2Txt | AddressForeign |
| City |
| X10 |
|---|
| Return |
| ReturnData |
| IRS990ScheduleC |
| Form990ScheduleCPartI |
| Sec527PolOrgs |
| AddressUS |
| City |
This one works well but only works on active xpaths in a specific XML document. We can’t use it with the list of raw xpaths from the concordance file.
## we can identify grouping variables
doc <- xml2::read_xml( sierraclub )
## get all xpaths
all.xpaths <- doc %>%
xml_get_paths()
## collapse xpaths to unique only
unique.xpaths <- all.xpaths %>%
unlist() %>%
unique()
## but what we really want is the parent node of terminal nodes.
## use the `only_terminal_parent = TRUE` to do this
## get all xpaths to parents of parent node
groups.all <- doc %>%
xml_get_paths( only_terminal_parent = TRUE )
## collapse xpaths to unique only
groups <- groups.all %>%
unlist() %>%
unique()
groups## [1] "/SIERRACLUB/Section527PoliticalOrgGrp"
## [2] "/SIERRACLUB/Section527PoliticalOrgGrp/OrganizationBusinessName"
## [3] "/SIERRACLUB/Section527PoliticalOrgGrp/USAddress"
This will return all variables at the first level where the nodes split. Some will be grouping variables, and some will be regular variables. Have to inspect to see.
I need to improve this to return only grouping variables, if possible.
find_group_name <- function( xpaths )
{
nodes <- strsplit( xpaths, "/" )
d1 <- data.frame( do.call( cbind, nodes ), stringsAsFactors=F )
not.equal <- apply( d1, MARGIN=1, FUN=function(x){ length( unique( x )) > 1 } )
this.one <- which( not.equal == T )[ 1 ]
group.names <- d1[ this.one, ] %>% as.character() %>% unique()
return( group.names )
}
find_group_name( xpaths.t1 ) ## [1] "Form990ScheduleCPartI" "Sec527PolOrgs"
## [3] "Section527PoliticalOrgGrp"
find_group_name( xpaths.t2 ) ## [1] "Form990ScheduleCPartIV" "SupplementalInformationDetail"
# node groups for Schedule C Part I-C Line 5
group.name.1 <- "//Section527PoliticalOrgGrp"
group.name.2 <- "//Sec527PolOrgs"
group.name.3 <- "//Form990ScheduleCPartI/Sec527PolOrgs"
all.versions <- paste( group.name.1, group.name.2,group.name.3, sep="|" )
# node groups for Schedule C Part IV
group.name.4 <- "//Form990ScheduleCPartIV"
group.name.5 <- "//SupplementalInformationDetail"
all.versions2 <- paste( group.name.4, group.name.5, sep="|" )
# sierra club
V1 <- "https://s3.amazonaws.com/irs-form-990/201843129349301954_public.xml"
# SOUTHERN POVERTY LAW CENTER INC , do not have data in part I-C
V2 <- "https://s3.amazonaws.com/irs-form-990/201900309349301030_public.xml"
#AMERICAN CIVIL LIBERTIES UNION FOUNDATION INC, do not have data in part I-C
V3 <- "https://s3.amazonaws.com/irs-form-990/201842749349301309_public.xml"
#National Association for the Advancement of Colored People, do not have data in part I-C
V4 <- "https://s3.amazonaws.com/irs-form-990/201813189349309906_public.xml"
#PLANNED PARENTHOOD GLOBAL INC
V5 <- "https://s3.amazonaws.com/irs-form-990/201821359349309027_public.xml"
#for table-01
list.of.tables <- list()
doc <- read_xml( V1 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions )
list.of.tables[[1]] <- nodeset_to_df( node.group )
doc <- read_xml( V2 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions )
list.of.tables[[2]] <- nodeset_to_df( node.group )
doc <- read_xml( V3 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions )
list.of.tables[[3]] <- nodeset_to_df( node.group )
doc <- read_xml( V4 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions )
list.of.tables[[4]] <- nodeset_to_df( node.group )
doc <- read_xml( V5 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions )
list.of.tables[[5]] <- nodeset_to_df( node.group )
# list.of.tables
bind_rows( list.of.tables ) payment_political_orgs <-bind_rows( list.of.tables ) ############# for table 02
list.of.tables2 <- list()
doc <- read_xml( V1 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions2 )
list.of.tables2[[1]] <- nodeset_to_df( node.group )
doc <- read_xml( V2 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions2 )
list.of.tables2[[2]] <- nodeset_to_df( node.group )
doc <- read_xml( V3 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions2 )
list.of.tables2[[3]] <- nodeset_to_df( node.group )
doc <- read_xml( V4 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions2 )
list.of.tables2[[4]] <- nodeset_to_df( node.group )
doc <- read_xml( V5 ) %>% xml_ns_strip()
node.group <- xml_find_all( doc, all.versions2 )
list.of.tables2[[5]] <- nodeset_to_df( node.group )
# list.of.tables
bind_rows( list.of.tables2 ) suplemental_info <- bind_rows( list.of.tables2 ) Once all of the group names are properly identified for each relational table we can construct the build functions for the relational databases.
The will look something like this, where “hospitals” is the index file with all Schedule H returns from a year.
# hypoethetical build file
#group.name.1 <- "//Section527PoliticalOrgGrp"
#group.name.2 <- "//Sec527PolOrgs"
#group.name.3 <- "//Form990ScheduleCPartI/Sec527PolOrgs"
#all.versions <- paste( group.name.1, group.name.2,group.name.3, sep="|" )
# node groups for Schedule C Part IV
#group.name.4 <- "//Form990ScheduleCPartIV"
#group.name.5 <- "//SupplementalInformationDetail"
#all.versions2 <- paste( group.name.4, group.name.5, sep="|" )
#i.counter <- 1
#sched.h.part.v <- list()
#for( i in hospitals$URL )
#{
#doc <- read_xml( i ) %>% xml_ns_strip()
#node.group <- xml_find_all( doc, all.versions )
#sched.h.part.v [[ i.counter ]] <- nodeset_to_df( node.group )
#i.counter <- i.counter + 1
#}
# load 2016 EFILE CORE
dat <- readRDS( gzcon( url( "https://www.dropbox.com/s/4ptsnryf64vkqlg/2016-EFILE-CORE.rds.rds?dl=1" ) ) )
head(dat) %>% pander()
# find schedule C filers
dat.c <- dplyr::filter( dat, SCHEDC=="TRUE" )
political_orgs<- head( dat.c$URL )
From the XML package - our doc is an xml2 object so not compatible.
doc <- XML::xmlParse( sierraclub )
nodes <- XML::getNodeSet( doc, "//SIERRACLUB/Section527PoliticalOrgGrp" )
XML::xmlToDataFrame( doc )XML::xmlToDataFrame( nodes )# this doesn't work - puts all in one row
# XML::xmlToList( doc ) %>% as.data.frame( stringsAsFactors=F )
# works for one node set
# XML::xmlToList( nodes[[1]] ) %>% as.data.frame( stringsAsFactors=F )Various options in various packages:
doc <- xml2::read_xml( sierraclub )
nodeset <- xml2::xml_children( doc ) # top level nodeset
xml2::xml_structure( doc )## <SIERRACLUB>
## <Section527PoliticalOrgGrp>
## <OrganizationBusinessName>
## <BusinessNameLine1Txt>
## {text}
## <USAddress>
## <AddressLine1Txt>
## {text}
## <CityNm>
## {text}
## <StateAbbreviationCd>
## {text}
## <ZIPCd>
## {text}
## <EIN>
## {text}
## <ContributionsRcvdDlvrAmt>
## {text}
xmltools::xml_get_paths( doc )## [[1]]
## [1] "/SIERRACLUB/Section527PoliticalOrgGrp"
## [2] "/SIERRACLUB/Section527PoliticalOrgGrp/EIN"
## [3] "/SIERRACLUB/Section527PoliticalOrgGrp/ContributionsRcvdDlvrAmt"
## [4] "/SIERRACLUB/Section527PoliticalOrgGrp/OrganizationBusinessName"
## [5] "/SIERRACLUB/Section527PoliticalOrgGrp/OrganizationBusinessName/BusinessNameLine1Txt"
## [6] "/SIERRACLUB/Section527PoliticalOrgGrp/USAddress"
## [7] "/SIERRACLUB/Section527PoliticalOrgGrp/USAddress/AddressLine1Txt"
## [8] "/SIERRACLUB/Section527PoliticalOrgGrp/USAddress/CityNm"
## [9] "/SIERRACLUB/Section527PoliticalOrgGrp/USAddress/StateAbbreviationCd"
## [10] "/SIERRACLUB/Section527PoliticalOrgGrp/USAddress/ZIPCd"
xmltools::xml_view_trees( doc )## ++-- Section527PoliticalOrgGrp
## +-- EIN
## +-- ContributionsRcvdDlvrAmt
## +-- OrganizationBusinessName
## ++-- BusinessNameLine1Txt
## ++-- USAddress
## +-- AddressLine1Txt
## +-- CityNm
## +-- StateAbbreviationCd
## ++-- ZIPCd
xmltools::xml_get_paths( doc, mark_terminal = "---> " )## [[1]]
## [1] "/SIERRACLUB/Section527PoliticalOrgGrp"
## [2] "---> /SIERRACLUB/Section527PoliticalOrgGrp/EIN"
## [3] "---> /SIERRACLUB/Section527PoliticalOrgGrp/ContributionsRcvdDlvrAmt"
## [4] "/SIERRACLUB/Section527PoliticalOrgGrp/OrganizationBusinessName"
## [5] "---> /SIERRACLUB/Section527PoliticalOrgGrp/OrganizationBusinessName/BusinessNameLine1Txt"
## [6] "/SIERRACLUB/Section527PoliticalOrgGrp/USAddress"
## [7] "---> /SIERRACLUB/Section527PoliticalOrgGrp/USAddress/AddressLine1Txt"
## [8] "---> /SIERRACLUB/Section527PoliticalOrgGrp/USAddress/CityNm"
## [9] "---> /SIERRACLUB/Section527PoliticalOrgGrp/USAddress/StateAbbreviationCd"
## [10] "---> /SIERRACLUB/Section527PoliticalOrgGrp/USAddress/ZIPCd"
for( i in 1:max.lev )
{
row.i <- as.character(xpath.levels[i,])
print( paste0( "LEVEL ", i, ": " ) )
print( paste0( sort(unique( row.i )), collapse="; " ) )
cat( "\n" )
}## [1] "LEVEL 1: "
## [1] "Return"
##
## [1] "LEVEL 2: "
## [1] "ReturnData"
##
## [1] "LEVEL 3: "
## [1] "IRS990ScheduleC"
##
## [1] "LEVEL 4: "
## [1] "Form990ScheduleCPartI; Sec527PolOrgs; Section527PoliticalOrgGrp"
##
## [1] "LEVEL 5: "
## [1] "AddressForeign; AddressUS; AmtOfContribsRecdDelivered; AmtPdFromInternalFunds; ContributionsRcvdDlvrAmt; EIN; ForeignAddress; NameOf527Organization; OrganizationBusinessName; PaidInternalFundsAmt; Sec527PolOrgs; USAddress"
##
## [1] "LEVEL 6: "
## [1] "; AddressForeign; AddressLine1; AddressLine1Txt; AddressLine2; AddressLine2Txt; AddressUS; AmtOfContribsRecdDelivered; AmtPdFromInternalFunds; BusinessNameLine1; BusinessNameLine1Txt; BusinessNameLine2; BusinessNameLine2Txt; City; CityNm; Country; CountryCd; EIN; ForeignPostalCd; NameOf527Organization; PostalCode; ProvinceOrState; ProvinceOrStateNm; State; StateAbbreviationCd; ZIPCd; ZIPCode"
##
## [1] "LEVEL 7: "
## [1] "; AddressLine1; AddressLine2; BusinessNameLine1; BusinessNameLine2; City; Country; PostalCode; ProvinceOrState; State; ZIPCode"
create_edgelist <- function( xpaths )
{
nodes <- strsplit( xpaths, "/" )
xpath.levels <- sapply( nodes, length )
max.lev <- max( xpath.levels )
nodes <- lapply( nodes, function(x){ c(x, rep("",max.lev-length(x) ) ) } )
xpath.levels <- data.frame( do.call( cbind, nodes ), stringsAsFactors=F )
df <- NULL
for( i in 1:ncol(xpath.levels) )
{
for( j in 1:(max.lev-1) )
{
df <- rbind( df, xpath.levels[ j:(j+1), i ] )
}
}
df <- as.data.frame(df, stringsAsFactors=F )
df <- unique( df )
df <- df[ ! grepl( "$^", df$V2 ) , ]
return( df )
}
el <- create_edgelist( xpaths.t2 )
doc <- xml2::read_xml( sierraclub )
nodeset <- xml2::xml_children( doc ) # top level nodeset
xpaths.sierraclub<- xmltools::xml_get_paths( doc ) %>% unlist() %>% unique()
xpaths.sierraclub <- gsub( "/SIERRACLUB/", "SIERRACLUB/", xpaths.sierraclub )
el <- create_edgelist( xpaths.sierraclub )library( igraph )
g <- graph_from_data_frame( d=el, directed=TRUE, vertices=NULL )
plot( g, layout=layout_as_tree )library( data.tree )
nd <- FromDataFrameNetwork( network=el )
print( nd )## levelName
## 1 SIERRACLUB
## 2 °--Section527PoliticalOrgGrp
## 3 ¦--EIN
## 4 ¦--ContributionsRcvdDlvrAmt
## 5 ¦--OrganizationBusinessName
## 6 ¦ °--BusinessNameLine1Txt
## 7 °--USAddress
## 8 ¦--AddressLine1Txt
## 9 ¦--CityNm
## 10 ¦--StateAbbreviationCd
## 11 °--ZIPCd
SetGraphStyle( nd, rankdir = "TB")
SetEdgeStyle( nd, arrowhead = "vee", color = "grey20", penwidth = 2 )
SetNodeStyle( nd,
style = "filled,rounded",
shape = "box",
fillcolor = "LightBlue",
fontname = "helvetica",
fontcolor="black",
tooltip = GetDefaultTooltip )
# SetNodeStyle(acme$IT, fillcolor = "LightBlue", penwidth = "5px")
plot( nd )library(networkD3)
net <- ToDataFrameNetwork( nd, "name" )
simpleNetwork( net[-3], fontSize = 12 )el <- create_edgelist( xpaths.t1 )
nd <- FromDataFrameNetwork( network=el )
net <- ToDataFrameNetwork( nd, "name" )
simpleNetwork( el, fontSize = 12 , linkDistance = 25, charge = -100 )